###########################################################################
## Gov 2001, Spring 2012
## Hummy Song and Steven Hoffman
## Replication Code
## Last updated April 25, 2012
###########################################################################

###########################################################################
##
## TABLE OF CONTENTS
##
## A) Replication of Exhibit 1
## B) Replication of Exhibit 2 and Exhibit 3
## C) Imbalance in original analysis (2008 data)
## D) CEM with listwise deletion for missing data (2008 data)
## E) CEM with multiple imputation for missing data (2008 data)
## F) Original model on other years' data (2007 data)
## G) Closest possible model on other years' data (2007 data) 
## H) Closest possible model on other years' data (2008 data) 
## I) Closest possible model on other years' data (2009 data) 
## J) CEM with multiple imputation for missing data (2007 data, closest possible model)
## K) CEM with multiple imputation for missing data (2008 data, closest possible model)
## L) CEM with multiple imputation for missing data (2009 data, closest possible model)
##
## NOTE: Code in all sections is self-contained after loading all of the 
##       packages listed below. 
###########################################################################

library(foreign)
library(survey)
library(Zelig)
library(xtable)
library(cem)
library(MatchIt)
library(Amelia)
library(mvtnorm)

###########################################################################
## A) Replication of Exhibit 1
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs08.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)
class(data)
summary(dstrat)

Ex1<-matrix(NA,27,5)

# Column 1: tabulate
Ex1[,1]<-c(
  sum(data$sex == 1) #tabulate females (16751),
  ,sum(data$sex != 1) #tabulate males (11990),
  ,sum(data$age < 18) # tabulate patients < 18 (5180)
  ,sum(data$age>=18 & data$age<=45) # tabulate patients 18-45 (7945)
  ,sum(data$age>45 & data$age <=64) # tabulate patients 45-64 (8405)
  ,sum(data$age>64) # tabulate patients > 64 (7211)
  ,sum(data$raceim==2) #black
  ,sum(data$raceim!=2) #non-black
  ,sum(data$paytyper==1) #private
  ,sum(data$paytyper==2) #medicare
  ,sum(data$paytyper==3) #medicaid/schip
  ,sum(data$paytyper==4 |data$paytyper==7 ) #other
  ,sum(data$paytyper==5 |data$paytyper==6 ) #none
  ,sum(data$speccat==1) #primary care
  ,sum(data$speccat==2) #surgical 
  ,sum(data$speccat==3) #medical specialty
  ,sum(data$retypoff==1 | data$retypoff==8) #private office 
  ,sum(data$retypoff==3) #community health center (missing 10, likely typo)
  ,sum(data$retypoff==7) #hmo
  ,sum(data$retypoff==2) #free standing clinic
  ,sum(data$retypoff==4) + sum(data$retypoff==5) +sum(data$retypoff==6) + sum(data$retypoff==9) 
  ,sum(data$empstat==1) # physician owns 
  ,sum(data$empstat==2|data$empstat==3) # physician employee or contractor
  ,sum(data$eimgres==1) # has system
  ,sum(data$eimgres==2|data$eimgres==4) # no system or turned off
  ,sum(data$eimage==1)
  ,sum(data$eimage==2|data$eimage==4 |data$eimage==-7) # no system or turned off or not applicable
    )

# Column 2: any image
Col2.list<-list(  
  svyratio(~anyimage==1 & sex==1, ~sex==1, dstrat)
  ,svyratio(~anyimage==1 & sex!=1, ~sex!=1, dstrat)
  ,svyratio(~age<18&anyimage==1,~age<18,dstrat)
  ,svyratio(~age>=18&age<=45&anyimage==1,~age>=18&age<=45,dstrat)
  ,svyratio(~age>45&age<=64&anyimage==1,~age>45&age<=64,dstrat)
  ,svyratio(~age>64&anyimage==1,~age>64,dstrat)
  ,svyratio(~anyimage==1&raceim==2,~raceim==2,dstrat)
  ,svyratio(~anyimage==1&raceim!=2,~raceim!=2,dstrat)
  ,svyratio(~anyimage==1&paytyper==1,~paytyper==1,dstrat)
  ,svyratio(~anyimage==1&paytyper==2,~paytyper==2,dstrat)
  ,svyratio(~anyimage==1&paytyper==3,~paytyper==3,dstrat)
  ,svyratio(~anyimage==1&(paytyper==4|paytyper==7),~(paytyper==4|paytyper==7),dstrat)
  ,svyratio(~anyimage==1&(paytyper==5|paytyper==6),~(paytyper==5|paytyper==6),dstrat)
  ,svyratio(~anyimage==1&speccat==1,~speccat==1,dstrat)
  ,svyratio(~anyimage==1&speccat==2,~speccat==2,dstrat)
  ,svyratio(~anyimage==1&speccat==3,~speccat==3,dstrat)
  ,svyratio(~anyimage==1&(retypoff==1 | retypoff==8),~(retypoff==1 | retypoff==8),dstrat)
  ,svyratio(~anyimage==1&retypoff==3 ,~retypoff==3 ,dstrat)
  ,svyratio(~anyimage==1&retypoff==7,~retypoff==7,dstrat)
  ,svyratio(~anyimage==1&retypoff==2,~retypoff==2,dstrat)
  ,svyratio(~anyimage==1&(retypoff==4|retypoff==5|retypoff==6|retypoff==9),~(retypoff==4|retypoff==5|retypoff==6|retypoff==9),dstrat)
  ,svyratio(~anyimage==1 & empstat==1, ~empstat==1, dstrat)
  ,svyratio(~anyimage==1 & (empstat==2|empstat==3),~(empstat==2|empstat==3),dstrat)
  ,svyratio(~anyimage==1& eimgres==1, ~eimgres==1, dstrat)
  ,svyratio(~anyimage==1& (eimgres==2|eimgres==4), ~(eimgres==2|eimgres==4), dstrat)
  ,svyratio(~anyimage==1&eimage==1, ~eimage==1,dstrat)
  ,svyratio(~anyimage==1&(eimage==2|eimage==4|eimage==-7), ~(eimage==2|eimage==4|eimage==-7),dstrat)
  )

for(i in 1:length(Col2.list)){
  Ex1[i,2]<-round(100*Col2.list[[i]][[1]],1)
    }

# Column 3: advanced imaging
Col3.list<-list(
  svyratio(~(catscan==1 | mri==1 | petscan==1)&sex==1,~sex==1,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&sex!=1,~sex!=1,dstrat)
  ,svyratio(~age<18&(catscan==1 | mri==1 | petscan==1),~age<18,dstrat)
  ,svyratio(~age>=18&age<=45&(catscan==1 | mri==1 | petscan==1),~age>=18&age<=45,dstrat)
  ,svyratio(~age>45&age<=64&(catscan==1 | mri==1 | petscan==1),~age>45&age<=64,dstrat)
  ,svyratio(~age>64&(catscan==1 | mri==1 | petscan==1),~age>64,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&raceim==2,~raceim==2,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&raceim!=2,~raceim!=2,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&paytyper==1,~paytyper==1,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&paytyper==2,~paytyper==2,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&paytyper==3,~paytyper==3,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&(paytyper==4|paytyper==7),~(paytyper==4|paytyper==7),dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&(paytyper==5|paytyper==6),~(paytyper==5|paytyper==6),dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&speccat==1,~speccat==1,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&speccat==2,~speccat==2,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&speccat==3,~speccat==3,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&(retypoff==1 | retypoff==8),~(retypoff==1 | retypoff==8),dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&retypoff==3 ,~retypoff==3 ,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&retypoff==7,~retypoff==7,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&retypoff==2,~retypoff==2,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&(retypoff==4|retypoff==5|retypoff==6|retypoff==9),~(retypoff==4|retypoff==5|retypoff==6|retypoff==9),dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1) & empstat==1, ~empstat==1, dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1) & (empstat==2|empstat==3),~(empstat==2|empstat==3),dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)& eimgres==1, ~eimgres==1, dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)& (eimgres==2|eimgres==4), ~(eimgres==2|eimgres==4), dstrat) # this one is a bit off 
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&eimage==1, ~eimage==1,dstrat)
  ,svyratio(~(catscan==1 | mri==1 | petscan==1)&(eimage==2|eimage==4|eimage==-7), ~(eimage==2|eimage==4|eimage==-7),dstrat)
    )
for(i in 1:length(Col3.list)){
  Ex1[i,3]<-round(100*Col3.list[[i]][[1]],1)
}

# Column 4: MRI
Col4.list<-list(
  svyratio(~mri==1 & sex==1, ~sex==1, dstrat)
  ,svyratio(~mri==1 & sex!=1, ~sex!=1, dstrat)
  ,svyratio(~age<18&mri==1,~age<18,dstrat)
  ,svyratio(~age>=18&age<=45&mri==1,~age>=18&age<=45,dstrat)
  ,svyratio(~age>45&age<=64&mri==1,~age>45&age<=64,dstrat)
  ,svyratio(~age>64&mri==1,~age>64,dstrat)
  ,svyratio(~mri==1&raceim==2,~raceim==2,dstrat)
  ,svyratio(~mri==1&raceim!=2,~raceim!=2,dstrat)
  ,svyratio(~mri==1&paytyper==1,~paytyper==1,dstrat)
  ,svyratio(~mri==1&paytyper==2,~paytyper==2,dstrat)
  ,svyratio(~mri==1&paytyper==3,~paytyper==3,dstrat)
  ,svyratio(~mri==1&(paytyper==4|paytyper==7),~(paytyper==4|paytyper==7),dstrat)
  ,svyratio(~mri==1&(paytyper==5|paytyper==6),~(paytyper==5|paytyper==6),dstrat)
  ,svyratio(~mri==1&speccat==1,~speccat==1,dstrat)
  ,svyratio(~mri==1&speccat==2,~speccat==2,dstrat)
  ,svyratio(~mri==1&speccat==3,~speccat==3,dstrat)
  ,svyratio(~mri==1&(retypoff==1 | retypoff==8),~(retypoff==1 | retypoff==8),dstrat)
  ,svyratio(~mri==1&retypoff==3 ,~retypoff==3 ,dstrat)
  ,svyratio(~mri==1&retypoff==7,~retypoff==7,dstrat)
  ,svyratio(~mri==1&retypoff==2,~retypoff==2,dstrat)
  ,svyratio(~mri==1&(retypoff==4|retypoff==5|retypoff==6|retypoff==9),~(retypoff==4|retypoff==5|retypoff==6|retypoff==9),dstrat)
  ,svyratio(~mri==1 & empstat==1, ~empstat==1, dstrat)
  ,svyratio(~mri==1 & (empstat==2|empstat==3),~(empstat==2|empstat==3),dstrat)
  ,svyratio(~mri==1& eimgres==1, ~eimgres==1, dstrat)
  ,svyratio(~mri==1& (eimgres==2|eimgres==4), ~(eimgres==2|eimgres==4), dstrat)
  ,svyratio(~mri==1&eimage==1, ~eimage==1,dstrat)
  ,svyratio(~mri==1&(eimage==2|eimage==4|eimage==-7), ~(eimage==2|eimage==4|eimage==-7),dstrat)
    )
for(i in 1:length(Col4.list)){
  Ex1[i,4]<-round(100*Col4.list[[i]][[1]],1)
}

# Column 5: CT
Col5.list<-list(
  svyratio(~catscan==1 & sex==1, ~sex==1, dstrat)
  ,svyratio(~catscan==1 & sex!=1, ~sex!=1, dstrat)
  ,svyratio(~age<18&catscan==1,~age<18,dstrat)
  ,svyratio(~age>=18&age<=45&catscan==1,~age>=18&age<=45,dstrat)
  ,svyratio(~age>45&age<=64&catscan==1,~age>45&age<=64,dstrat)
  ,svyratio(~age>64&catscan==1,~age>64,dstrat)
  ,svyratio(~catscan==1&raceim==2,~raceim==2,dstrat)
  ,svyratio(~catscan==1&raceim!=2,~raceim!=2,dstrat)
  ,svyratio(~catscan==1&paytyper==1,~paytyper==1,dstrat)
  ,svyratio(~catscan==1&paytyper==2,~paytyper==2,dstrat)
  ,svyratio(~catscan==1&paytyper==3,~paytyper==3,dstrat)
  ,svyratio(~catscan==1&(paytyper==4|paytyper==7),~(paytyper==4|paytyper==7),dstrat)
  ,svyratio(~catscan==1&(paytyper==5|paytyper==6),~(paytyper==5|paytyper==6),dstrat)
  ,svyratio(~catscan==1&speccat==1,~speccat==1,dstrat)
  ,svyratio(~catscan==1&speccat==2,~speccat==2,dstrat)
  ,svyratio(~catscan==1&speccat==3,~speccat==3,dstrat)
  ,svyratio(~catscan==1&(retypoff==1 | retypoff==8),~(retypoff==1 | retypoff==8),dstrat)
  ,svyratio(~catscan==1&retypoff==3 ,~retypoff==3 ,dstrat)
  ,svyratio(~catscan==1&retypoff==7,~retypoff==7,dstrat)
  ,svyratio(~catscan==1&retypoff==2,~retypoff==2,dstrat)
  ,svyratio(~catscan==1&(retypoff==4|retypoff==5|retypoff==6|retypoff==9),~(retypoff==4|retypoff==5|retypoff==6|retypoff==9),dstrat)
  ,svyratio(~catscan==1 & empstat==1, ~empstat==1, dstrat)
  ,svyratio(~catscan==1 & (empstat==2|empstat==3),~(empstat==2|empstat==3),dstrat)
  ,svyratio(~catscan==1& eimgres==1, ~eimgres==1, dstrat)
  ,svyratio(~catscan==1& (eimgres==2|eimgres==4), ~(eimgres==2|eimgres==4), dstrat)
  ,svyratio(~catscan==1&eimage==1, ~eimage==1,dstrat)
  ,svyratio(~catscan==1&(eimage==2|eimage==4|eimage==-7), ~(eimage==2|eimage==4|eimage==-7),dstrat)
    )
for(i in 1:length(Col5.list)){
  Ex1[i,5]<-round(100*Col5.list[[i]][[1]],1)
}

colnames(Ex1)<-c("n=","Any image","Any advanced imaging","MRI","CT")
rownames(Ex1)<-c("Female","Male","<18","18-45","46-64",">64","Black","Nonblack","Private","Medicare","Medicaid","Other","None","Primary care","Surgical specialty","Medical specialty","Private office","Community health center","Health maintenance organization","Free-standing clinic","Other office","Yes","No","Yes","No","Yes","No")

#Summarized output for Replication of Exhibit 1
Ex1   								#Exhibit 1


###########################################################################
## B) Replication of Exhibit 2 and Exhibit 3
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs08.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0 #n=11990
data$gender[data$sex==1]<-1 #n=16751

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0 #n=5180
data$recodedage[data$age >= 18 & data$age <= 45] <- 1 #n=7945
data$recodedage[data$age > 45 & data$age <= 64] <- 2 #n=8405
data$recodedage[data$age > 64] <- 3 #n=7211

data$black <- NA
data$black[data$raceim==2] <- 1 #n=3384
data$black[data$raceim!=2] <- 0 #n=25357

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1 #n=3891
data$hispanic[data$ethim==2]<-0 #n=24850

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1 #n=12395
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0 #n=16346

data$urban <- NA
data$urban[data$msa==1] <- 1  #n=26105
data$urban[data$msa==2] <- 0  #n=2636

data$established <- NA
data$established[data$senbefor==1] <- 1 #n=24535
data$established[data$senbefor==2] <- 0 #n=4206

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0 #private ... n=14507
data$insurancetype[data$paytyper==2] <- 1 #medicare ... n=6689
data$insurancetype[data$paytyper==3] <- 2 #medicaid ... n=3617
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3 #other...n=1057
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4 #none...n=1697

data$specialty <- NA
data$specialty[data$speccat==2] <- 0 #surgical ... n=6996
data$specialty[data$speccat==1] <- 1 #pcp ... n=14701
data$specialty[data$speccat==3] <- 2 #med spec ... n=7044

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0 #priv office..n=23669
data$practice[data$retypoff==3] <- 1 #comm health center...n=3345
data$practice[data$retypoff==7] <- 2 #hmo...n=645
data$practice[data$retypoff==2] <- 3 #free standing clnic...n=842
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4 #other...n=240

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1|data$petscan==1] <- 1

# Exhibit 2 Regression a (Regression 1)
ex2a <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2a.coef <- summary(ex2a)$coef
ex2a.ests <- exp(ex2a.coef[,1])
ex2a.lower.ci <- exp(ex2a.coef[,1] - 1.96 * ex2a.coef[,2])
ex2a.upper.ci <- exp(ex2a.coef[,1] + 1.96 * ex2a.coef[,2])
ex2a.coef.ci <- cbind(ex2a.ests, ex2a.lower.ci, ex2a.upper.ci)
ex2a.coef.ci

# Exhibit 2 Regression b (Regression 2)
ex2b <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
#exp(ex2b$coefficients)
ex2b.coef <- summary(ex2b)$coef
ex2b.ests <- exp(ex2b.coef[,1])
ex2b.lower.ci <- exp(ex2b.coef[,1] - 1.96 * ex2b.coef[,2])
ex2b.upper.ci <- exp(ex2b.coef[,1] + 1.96 * ex2b.coef[,2])
ex2b.coef.ci <- cbind(ex2b.ests, ex2b.lower.ci, ex2b.upper.ci)
ex2b.coef.ci

# Authors' results
authors.ex2a <- as.matrix(c(1.44, 2.69, 3.82, 3.13, 0.87, 0.89, 0.94, 1.07, 0.53, 0.99, 0.86, 1.56, 0.69, 0.77, 0.73, 0.64, 0.38, 0.54, 1.36, 0.63, 0.86, 0.99, 0.93, 1.03, 1.40))
authors.ex2b <- as.matrix(c(1.42, 2.89, 4.20, 3.24, 0.86, 0.88, 0.93, 1.07, 0.55, 0.98, 0.87, 1.71, 0.62, 0.80, 0.70, 0.67, 0.39, 0.80, 0.82, 0.62, 0.85, 0.98, 1.01, 1.09, 1.45))

# Replicated results
ours.ex2a <- as.matrix(exp(ex2a$coefficients[-1]))
ours.ex2b <- as.matrix(exp(ex2b$coefficients[-1]))

# Combined
exhibit2repl <- cbind(authors.ex2a, ours.ex2a, authors.ex2b, ours.ex2b)
round(exhibit2repl, digits = 2)

# Exhibit 3 Regression a (Regression 3)
ex3a <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3a.coef <- summary(ex3a)$coef
ex3a.ests <- exp(ex3a.coef[,1])
ex3a.lower.ci <- exp(ex3a.coef[,1] - 1.96 * ex3a.coef[,2])
ex3a.upper.ci <- exp(ex3a.coef[,1] + 1.96 * ex3a.coef[,2])
ex3a.coef.ci <- cbind(ex3a.ests, ex3a.lower.ci, ex3a.upper.ci)
ex3a.coef.ci

# Exhibit 3 Regression b (Regression 4)
ex3b <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3b.coef <- summary(ex3b)$coef
ex3b.ests <- exp(ex3b.coef[,1])
ex3b.lower.ci <- exp(ex3b.coef[,1] - 1.96 * ex3b.coef[,2])
ex3b.upper.ci <- exp(ex3b.coef[,1] + 1.96 * ex3b.coef[,2])
ex3b.coef.ci <- cbind(ex3b.ests, ex3b.lower.ci, ex3b.upper.ci)
ex3b.coef.ci

# Authors' results
authors.ex3a <- as.matrix(c(0.97, 4.10, 6.34, 5.28, 1.11, 0.81, 0.97, 1.05, 0.55, 1.07, 1.15, 1.66, 1.00, 0.63, 1.01, 1.27, 1.56, 0.73, 1.81, 0.60, 1.44, 0.86, 2.49, 1.24, 1.71))
authors.ex3b <- as.matrix(c(0.95, 3.83, 6.44, 5.21, 1.04, 0.82, 1.04, 1.02, 0.56, 1.03, 1.13, 2.03, 1.02, 0.61, 0.97, 1.34, 1.23, 0.87, 0.41, 0.61, 1.46, 1.00, 2.75, 1.30, 1.78))

# Replicated results
ours.ex3a <- as.matrix(exp(ex3a$coefficients[-1]))
ours.ex3b <- as.matrix(exp(ex3b$coefficients[-1]))

# Combined
exhibit3repl <- cbind(authors.ex3a, ours.ex3a, authors.ex3b, ours.ex3b)
round(exhibit3repl, digits = 2)

# Summarized output for Replication of Exhibit 2 and Exhibit 3
summary(ex2a)    					#Exhibit 2 significance levels
round(ex2a.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex2b)    					#Exhibit 2 significance levels
round(ex2b.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex3a)    					#Exhibit 3 significance levels
round(ex3a.coef.ci, digits = 2)		#Exhibit 3 confidence intervals
summary(ex3b)    					#Exhibit 3 significance levels
round(ex3b.coef.ci, digits = 2)		#Exhibit 3 confidence intervals


###########################################################################
## C) Imbalance in original analysis (2008 data)
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs08.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)
data$gender <- NA
data$gender[data$sex==2]<-0 
data$gender[data$sex==1]<-1 

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0
data$insurancetype[data$paytyper==2] <- 1
data$insurancetype[data$paytyper==3] <- 2
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1|data$petscan==1] <- 1

# Regressions 1 and 3
#subset data
reg13.data.sub <- data[,c("anyimage","advancedimage","gender","recodedage","black","hispanic","highpoverty","urban","established","insurancetype","specialty","practice","solopractice","ownership","prepaid","hospitalowned","profiling","compresults","patwt","cpsum","cstratm")] 
dim(reg13.data.sub)

#balance w/ no deletion
reg13.imbalance.full <- imbalance(group=reg13.data.sub$compresults, data=reg13.data.sub, drop=c("compresults","anyimage","advancedimage","cstratm","patwt","cpsum"))
reg13.imbalance.full
# Multivariate Imbalance Measure: L1=0.572
# Percentage of local common support: LCS=22.4%

table.reg13.imbalance.full <- as.matrix(reg13.imbalance.full[[1]])
table.reg13.imbalance.full <- table.reg13.imbalance.full[,c(1,3:8)]
table.reg13.imbalance.full <- apply(table.reg13.imbalance.full,2,as.numeric)
table.reg13.imbalance.full <- round(table.reg13.imbalance.full, 3)
rownames(table.reg13.imbalance.full) <- rownames(reg13.imbalance.full[[1]])
table.reg13.imbalance.full

#listwise deletion
reg13.data.sub.ld <- na.omit(reg13.data.sub)
dim(reg13.data.sub.ld)
# 5820 obs deleted

# balance w/ listwise deletion
reg13.imbalance.ld <- imbalance(group=reg13.data.sub.ld$compresults, data=reg13.data.sub.ld, drop=c("compresults","anyimage","advancedimage","cstratm","patwt","cpsum"))
reg13.imbalance.ld
# Multivariate Imbalance Measure: L1=0.530
# Percentage of local common support: LCS=26.3%

table.reg13.imbalance.ld <- as.matrix(reg13.imbalance.ld[[1]])
table.reg13.imbalance.ld <- table.reg13.imbalance.ld[,c(1,3:8)]
table.reg13.imbalance.ld <- apply(table.reg13.imbalance.ld,2,as.numeric)
table.reg13.imbalance.ld <- round(table.reg13.imbalance.ld, 3)
rownames(table.reg13.imbalance.ld) <- rownames(reg13.imbalance.ld[[1]])
table.reg13.imbalance.ld

# Regression 2 and 4
#subset data
reg24.data.sub <- data[,c("anyimage","advancedimage","gender","recodedage","black","hispanic","highpoverty","urban","established","insurancetype","specialty","practice","solopractice","ownership","prepaid","hospitalowned","profiling","compimages","patwt","cpsum","cstratm")] 
dim(reg24.data.sub)

#balance w/ no deletion
reg24.imbalance.full <- imbalance(group=reg24.data.sub$compimages, data=reg24.data.sub, drop=c("compimages","anyimage","advancedimage","cstratm","patwt","cpsum"))
reg24.imbalance.full
# Multivariate Imbalance Measure: L1=0.612
# Percentage of local common support: LCS=15.8%

table.reg24.imbalance.full <- as.matrix(reg24.imbalance.full[[1]])
table.reg24.imbalance.full <- table.reg24.imbalance.full[,c(1,3:8)]
table.reg24.imbalance.full <- apply(table.reg24.imbalance.full,2,as.numeric)
table.reg24.imbalance.full <- round(table.reg24.imbalance.full, 3)
rownames(table.reg24.imbalance.full) <- rownames(reg24.imbalance.full[[1]])
table.reg24.imbalance.full

#listwise deletion
reg24.data.sub.ld <- na.omit(reg24.data.sub)
dim(reg24.data.sub.ld)
# 8215 obs deleted

# balance w/ listwise deletion
reg24.imbalance.ld <- imbalance(group=reg24.data.sub.ld$compimages, data=reg24.data.sub.ld, drop=c("compimages","anyimage","advancedimage","cstratm","patwt","cpsum"))
reg24.imbalance.ld
# Multivariate Imbalance Measure: L1=0.570
# Percentage of local common support: LCS=20.6%

table.reg24.imbalance.ld <- as.matrix(reg24.imbalance.ld[[1]])
table.reg24.imbalance.ld <- table.reg24.imbalance.ld[,c(1,3:8)]
table.reg24.imbalance.ld <- apply(table.reg24.imbalance.ld,2,as.numeric)
table.reg24.imbalance.ld <- round(table.reg24.imbalance.ld, 3)
rownames(table.reg24.imbalance.ld) <- rownames(reg24.imbalance.ld[[1]])
table.reg24.imbalance.ld


###########################################################################
## D) CEM with listwise deletion for missing data (2008 data)
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs08.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0
data$insurancetype[data$paytyper==2] <- 1
data$insurancetype[data$paytyper==3] <- 2
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1|data$petscan==1] <- 1

# Regression 1
reg1.data.sub <- data[,c("anyimage","gender","recodedage","black","hispanic","highpoverty","urban","established","insurancetype","specialty","practice","solopractice","ownership","prepaid","hospitalowned","profiling","compresults","patwt","cpsum","cstratm")] 
reg1.data.sub.ld <- na.omit(reg1.data.sub)
reg1.cem.match <- matchit(formula=compresults~gender+recodedage+black+hispanic+highpoverty+urban+established+insurancetype+specialty+practice+solopractice+ownership+prepaid+hospitalowned+profiling, data=reg1.data.sub.ld, method="cem")
reg1.cem.data <- match.data(reg1.cem.match)
reg1.cem.imbalance <- imbalance(group=reg1.cem.data$compresults, data=reg1.cem.data, drop=c("compresults","anyimage","patwt","cpsum","cstratm","distance","weights","subclass"))
reg1.cem.imbalance
# Multivariate Imbalance Measure: L1=0.338
# Percentage of local common support: LCS=100.0%
table.reg1.cem.imbalance <- as.matrix(reg1.cem.imbalance[[1]])
table.reg1.cem.imbalance <- table.reg1.cem.imbalance[,c(1,3:8)]
table.reg1.cem.imbalance <- apply(table.reg1.cem.imbalance,2,as.numeric)
table.reg1.cem.imbalance <- round(table.reg1.cem.imbalance, 3)
rownames(table.reg1.cem.imbalance) <- rownames(reg1.cem.imbalance[[1]])
table.reg1.cem.imbalance
reg1.cem.out <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=reg1.cem.data)
reg1.cem.coef <- summary(reg1.cem.out)$coef
reg1.cem.ests <- exp(reg1.cem.coef[,1])
reg1.cem.lci <- exp(reg1.cem.coef[,1] - 1.96 * reg1.cem.coef[,2])
reg1.cem.uci <- exp(reg1.cem.coef[,1] + 1.96 * reg1.cem.coef[,2])
reg1.cem.coef.ci <- cbind(reg1.cem.ests, reg1.cem.lci, reg1.cem.uci)
reg1.cem.coef.ci

# Regression 2
reg2.data.sub <- data[,c("anyimage","gender","recodedage","black","hispanic","highpoverty","urban","established","insurancetype","specialty","practice","solopractice","ownership","prepaid","hospitalowned","profiling","compimages","patwt","cpsum","cstratm")] 
reg2.data.sub.ld <- na.omit(reg2.data.sub)
reg2.cem.match <- matchit(formula=compimages~gender+recodedage+black+hispanic+highpoverty+urban+established+insurancetype+specialty+practice+solopractice+ownership+prepaid+hospitalowned+profiling, data=reg2.data.sub.ld, method="cem")
reg2.cem.data <- match.data(reg2.cem.match)
reg2.cem.imbalance <- imbalance(group=reg2.cem.data$compimages, data=reg2.cem.data, drop=c("compimages","anyimage","patwt","cpsum","cstratm","distance","weights","subclass"))
reg2.cem.imbalance
# Multivariate Imbalance Measure: L1=0.340
# Percentage of local common support: LCS=100.0%
table.reg2.cem.imbalance <- as.matrix(reg2.cem.imbalance[[1]])
table.reg2.cem.imbalance <- table.reg2.cem.imbalance[,c(1,3:8)]
table.reg2.cem.imbalance <- apply(table.reg2.cem.imbalance,2,as.numeric)
table.reg2.cem.imbalance <- round(table.reg2.cem.imbalance, 3)
rownames(table.reg2.cem.imbalance) <- rownames(reg2.cem.imbalance[[1]])
table.reg2.cem.imbalance
reg2.cem.out <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=reg2.cem.data)
reg2.cem.coef <- summary(reg2.cem.out)$coef
reg2.cem.ests <- exp(reg2.cem.coef[,1])
reg2.cem.lci <- exp(reg2.cem.coef[,1] - 1.96 * reg2.cem.coef[,2])
reg2.cem.uci <- exp(reg2.cem.coef[,1] + 1.96 * reg2.cem.coef[,2])
reg2.cem.coef.ci <- cbind(reg2.cem.ests, reg2.cem.lci, reg2.cem.uci)
reg2.cem.coef.ci

# Regression 3
reg3.data.sub <- data[,c("advancedimage","gender","recodedage","black","hispanic","highpoverty","urban","established","insurancetype","specialty","practice","solopractice","ownership","prepaid","hospitalowned","profiling","compresults","patwt","cpsum","cstratm")] 
reg3.data.sub.ld <- na.omit(reg3.data.sub)
reg3.cem.match <- matchit(formula=compresults~gender+recodedage+black+hispanic+highpoverty+urban+established+insurancetype+specialty+practice+solopractice+ownership+prepaid+hospitalowned+profiling, data=reg3.data.sub.ld, method="cem")
reg3.cem.data <- match.data(reg3.cem.match)
reg3.cem.imbalance <- imbalance(group=reg3.cem.data$compresults, data=reg3.cem.data, drop=c("compresults","advancedimage","patwt","cpsum","cstratm","distance","weights","subclass"))
reg3.cem.imbalance
# Multivariate Imbalance Measure: L1=0.338
# Percentage of local common support: LCS=100.0%
table.reg3.cem.imbalance <- as.matrix(reg3.cem.imbalance[[1]])
table.reg3.cem.imbalance <- table.reg3.cem.imbalance[,c(1,3:8)]
table.reg3.cem.imbalance <- apply(table.reg3.cem.imbalance,2,as.numeric)
table.reg3.cem.imbalance <- round(table.reg3.cem.imbalance, 3)
rownames(table.reg3.cem.imbalance) <- rownames(reg3.cem.imbalance[[1]])
table.reg3.cem.imbalance
reg3.cem.out <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=reg3.cem.data)
reg3.cem.coef <- summary(reg3.cem.out)$coef
reg3.cem.ests <- exp(reg3.cem.coef[,1])
reg3.cem.lci <- exp(reg3.cem.coef[,1] - 1.96 * reg3.cem.coef[,2])
reg3.cem.uci <- exp(reg3.cem.coef[,1] + 1.96 * reg3.cem.coef[,2])
reg3.cem.coef.ci <- cbind(reg3.cem.ests, reg3.cem.lci, reg3.cem.uci)
reg3.cem.coef.ci

# Regression 4
reg4.data.sub <- data[,c("advancedimage","gender","recodedage","black","hispanic","highpoverty","urban","established","insurancetype","specialty","practice","solopractice","ownership","prepaid","hospitalowned","profiling","compimages","patwt","cpsum","cstratm")] 
reg4.data.sub.ld <- na.omit(reg4.data.sub)
reg4.cem.match <- matchit(formula=compimages~gender+recodedage+black+hispanic+highpoverty+urban+established+insurancetype+specialty+practice+solopractice+ownership+prepaid+hospitalowned+profiling, data=reg4.data.sub.ld, method="cem")
reg4.cem.data <- match.data(reg4.cem.match)
reg4.cem.imbalance <- imbalance(group=reg4.cem.data$compimages, data=reg4.cem.data, drop=c("compimages","advancedimage","patwt","cpsum","cstratm","distance","weights","subclass"))
reg4.cem.imbalance
# Multivariate Imbalance Measure: L1=0.340
# Percentage of local common support: LCS=100.0%
table.reg4.cem.imbalance <- as.matrix(reg4.cem.imbalance[[1]])
table.reg4.cem.imbalance <- table.reg4.cem.imbalance[,c(1,3:8)]
table.reg4.cem.imbalance <- apply(table.reg4.cem.imbalance,2,as.numeric)
table.reg4.cem.imbalance <- round(table.reg4.cem.imbalance, 3)
rownames(table.reg4.cem.imbalance) <- rownames(reg4.cem.imbalance[[1]])
table.reg4.cem.imbalance
reg4.cem.out <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=reg4.cem.data)
reg4.cem.coef <- summary(reg4.cem.out)$coef
reg4.cem.ests <- exp(reg4.cem.coef[,1])
reg4.cem.lci <- exp(reg4.cem.coef[,1] - 1.96 * reg4.cem.coef[,2])
reg4.cem.uci <- exp(reg4.cem.coef[,1] + 1.96 * reg4.cem.coef[,2])
reg4.cem.coef.ci <- cbind(reg4.cem.ests, reg4.cem.lci, reg4.cem.uci)
reg4.cem.coef.ci

# Summarized output for CEM with list-wise deletion for missing data
summary(reg1.cem.out)    			
round(reg1.cem.coef.ci, digits = 2)		
summary(reg2.cem.out)    					
round(reg2.cem.coef.ci, digits = 2)		
summary(reg3.cem.out)    					
round(reg3.cem.coef.ci, digits = 2)		
summary(reg4.cem.out)    					
round(reg4.cem.coef.ci, digits = 2)		


###########################################################################
## E) CEM with multiple imputation for missing data (2008 data)
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs08.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0
data$insurancetype[data$paytyper==2] <- 1
data$insurancetype[data$paytyper==3] <- 2
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0 
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1|data$petscan==1] <- 1

# Mutiple imputation for missing data
data.subset <- data[c("anyimage", "advancedimage", "gender", "recodedage", "black", "hispanic", "highpoverty", "urban", "established", "insurancetype", "specialty", "practice", "solopractice", "ownership", "prepaid", "hospitalowned", "profiling", "compresults", "compimages", "patwt", "cpsum", "cstratm")]
data.imputed <- amelia(data.subset, m = 5, ords = c("anyimage", "advancedimage","compresults","compimages"), noms = c("insurancetype", "specialty", "practice"), idvars = c("patwt","cpsum","cstratm"))
# Only 1786 cases have missing data
#Note: Only variables with missing data are 10) "insurancetype", 13) "solopractice", 14) "ownership", 16) "hospitalowned", 17) "compresults", 18) "compimages", and ??) "profiling".

# CEM for imputed data for compresults
first.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[1]], method="cem")
first.results.matched <- match.data(first.results)
balance.1r <- imbalance(group=first.results.matched$compresults, data=first.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1r
# Multivariate Imbalance Measure: L1=0.369
# Percentage of local common support: LCS=66.5%

second.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[2]], method="cem")
second.results.matched <- match.data(second.results)
balance.2r <- imbalance(group=second.results.matched$compresults, data=second.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2r
# Multivariate Imbalance Measure: L1=0.361
# Percentage of local common support: LCS=73.2%

third.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[3]], method="cem")
third.results.matched <- match.data(third.results)
balance.3r <- imbalance(group=third.results.matched$compresults, data=third.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3r
# Multivariate Imbalance Measure: L1=0.361
# Percentage of local common support: LCS=72.4%

fourth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[4]], method="cem")
fourth.results.matched <- match.data(fourth.results)
balance.4r <- imbalance(group=fourth.results.matched$compresults, data=fourth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4r
# Multivariate Imbalance Measure: L1=0.362
# Percentage of local common support: LCS=70.0%

fifth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[5]], method="cem")
fifth.results.matched <- match.data(fifth.results)
balance.5r <- imbalance(group=fifth.results.matched$compresults, data=fifth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5r
# Multivariate Imbalance Measure: L1=0.360
# Percentage of local common support: LCS=71.8%

# CEM for imputed data for compimages
first.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[1]], method="cem")
first.images.matched <- match.data(first.images)
balance.1i <- imbalance(group=first.images.matched$compimages, data=first.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1i
# Multivariate Imbalance Measure: L1=0.366
# Percentage of local common support: LCS=66.5%

second.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[2]], method="cem")
second.images.matched <- match.data(second.images)
balance.2i <- imbalance(group=second.images.matched$compimages, data=second.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2i
# Multivariate Imbalance Measure: L1=0.356
# Percentage of local common support: LCS=72.5%

third.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[3]], method="cem")
third.images.matched <- match.data(third.images)
balance.3i <- imbalance(group=third.images.matched$compimages, data=third.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3i
# Multivariate Imbalance Measure: L1=0.356
# Percentage of local common support: LCS=72.3%

fourth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[4]], method="cem")
fourth.images.matched <- match.data(fourth.images)
balance.4i <- imbalance(group=fourth.images.matched$compimages, data=fourth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4i
# Multivariate Imbalance Measure: L1=0.354
# Percentage of local common support: LCS=70.3%

fifth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling, data=data.imputed$imputations[[5]], method="cem")
fifth.images.matched <- match.data(fifth.images)
balance.5i <- imbalance(group=fifth.images.matched$compimages, data=fifth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5i
# Multivariate Imbalance Measure: L1=0.351
# Percentage of local common support: LCS=72.3%

# Regression 1
reg1.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg1.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg1.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg1.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg1.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg1.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.1), sigma=vcov(reg1.1)))
sim.beta.reg1.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.2), sigma=vcov(reg1.2)))
sim.beta.reg1.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.3), sigma=vcov(reg1.3)))
sim.beta.reg1.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.4), sigma=vcov(reg1.4)))
sim.beta.reg1.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.5), sigma=vcov(reg1.5)))
betas.reg1 <- c(sim.beta.reg1.1$compresults, sim.beta.reg1.2$compresults, sim.beta.reg1.3$compresults, sim.beta.reg1.4$compresults, sim.beta.reg1.5$compresults)
reg1.imputed.matched.OR <- exp(mean(betas.reg1))
reg1.imputed.matched.OR.lci <- exp(quantile(betas.reg1, 0.025))
reg1.imputed.matched.OR.uci <- exp(quantile(betas.reg1, 0.975))
reg1.imputed.matched <- cbind(reg1.imputed.matched.OR, reg1.imputed.matched.OR.lci, reg1.imputed.matched.OR.uci)

# Regression 2
reg2.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg2.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg2.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg2.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg2.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg2.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.1), sigma=vcov(reg2.1)))
sim.beta.reg2.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.2), sigma=vcov(reg2.2)))
sim.beta.reg2.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.3), sigma=vcov(reg2.3)))
sim.beta.reg2.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.4), sigma=vcov(reg2.4)))
sim.beta.reg2.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.5), sigma=vcov(reg2.5)))
betas.reg2 <- c(sim.beta.reg2.1$compimages, sim.beta.reg2.2$compimages, sim.beta.reg2.3$compimages, sim.beta.reg2.4$compimages, sim.beta.reg2.5$compimages)
reg2.imputed.matched.OR <- exp(mean(betas.reg2))
reg2.imputed.matched.OR.lci <- exp(quantile(betas.reg2, 0.025))
reg2.imputed.matched.OR.uci <- exp(quantile(betas.reg2, 0.975))
reg2.imputed.matched <- cbind(reg2.imputed.matched.OR, reg2.imputed.matched.OR.lci, reg2.imputed.matched.OR.uci)

# Regression 3
reg3.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg3.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg3.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg3.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg3.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg3.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.1), sigma=vcov(reg3.1)))
sim.beta.reg3.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.2), sigma=vcov(reg3.2)))
sim.beta.reg3.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.3), sigma=vcov(reg3.3)))
sim.beta.reg3.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.4), sigma=vcov(reg3.4)))
sim.beta.reg3.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.5), sigma=vcov(reg3.5)))
betas.reg3 <- c(sim.beta.reg3.1$compresults, sim.beta.reg3.2$compresults, sim.beta.reg3.3$compresults, sim.beta.reg3.4$compresults, sim.beta.reg3.5$compresults)
reg3.imputed.matched.OR <- exp(mean(betas.reg3))
reg3.imputed.matched.OR.lci <- exp(quantile(betas.reg3, 0.025))
reg3.imputed.matched.OR.uci <- exp(quantile(betas.reg3, 0.975))
reg3.imputed.matched <- cbind(reg3.imputed.matched.OR, reg3.imputed.matched.OR.lci, reg3.imputed.matched.OR.uci)

# Regression 4
reg4.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg4.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg4.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg4.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg4.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+profiling+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg4.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.1), sigma=vcov(reg4.1)))
sim.beta.reg4.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.2), sigma=vcov(reg4.2)))
sim.beta.reg4.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.3), sigma=vcov(reg4.3)))
sim.beta.reg4.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.4), sigma=vcov(reg4.4)))
sim.beta.reg4.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.5), sigma=vcov(reg4.5)))
betas.reg4 <- c(sim.beta.reg4.1$compimages, sim.beta.reg4.2$compimages, sim.beta.reg4.3$compimages, sim.beta.reg4.4$compimages, sim.beta.reg4.5$compimages)
reg4.imputed.matched.OR <- exp(mean(betas.reg4))
reg4.imputed.matched.OR.lci <- exp(quantile(betas.reg4, 0.025))
reg4.imputed.matched.OR.uci <- exp(quantile(betas.reg4, 0.975))
reg4.imputed.matched <- cbind(reg4.imputed.matched.OR, reg4.imputed.matched.OR.lci, reg4.imputed.matched.OR.uci)

imputed.CEM.2008 <- rbind(reg1.imputed.matched, reg2.imputed.matched, reg3.imputed.matched, reg4.imputed.matched)
rownames(imputed.CEM.2008) <- rbind("Regression 1", "Regression 2", "Regression 3", "Regression 4")
colnames(imputed.CEM.2008) <- cbind("OR", "lower CI", "upper CI")

# Summarized output for CEM with multiple imputation for missing data
round(imputed.CEM.2008, digits = 2)		# Coefficients and CIs


###########################################################################
## F) Original model on other years' data (2007 data)
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs07.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytype==1] <- 0
data$insurancetype[data$paytype==2] <- 1
data$insurancetype[data$paytype==3] <- 2
data$insurancetype[data$paytype==4|data$paytype==7] <- 3
data$insurancetype[data$paytype==5|data$paytype==6] <- 4

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1|data$petscan==1] <- 1

# Regression 1
ex2a <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2a.coef <- summary(ex2a)$coef
ex2a.ests <- exp(ex2a.coef[,1])
ex2a.lower.ci <- exp(ex2a.coef[,1] - 1.96 * ex2a.coef[,2])
ex2a.upper.ci <- exp(ex2a.coef[,1] + 1.96 * ex2a.coef[,2])
ex2a.coef.ci <- cbind(ex2a.ests, ex2a.lower.ci, ex2a.upper.ci)
ex2a.coef.ci

# Regression 2
ex2b <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2b.coef <- summary(ex2b)$coef
ex2b.ests <- exp(ex2b.coef[,1])
ex2b.lower.ci <- exp(ex2b.coef[,1] - 1.96 * ex2b.coef[,2])
ex2b.upper.ci <- exp(ex2b.coef[,1] + 1.96 * ex2b.coef[,2])
ex2b.coef.ci <- cbind(ex2b.ests, ex2b.lower.ci, ex2b.upper.ci)
ex2b.coef.ci

# Our results
ours.ex2a <- as.matrix(exp(ex2a$coefficients[-1]))
ours.ex2b <- as.matrix(exp(ex2b$coefficients[-1]))

# Combined
exhibit2repl <- cbind(ours.ex2a, ours.ex2b)
round(exhibit2repl, digits = 2)

#Regression 3
ex3a <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3a.coef <- summary(ex3a)$coef
ex3a.ests <- exp(ex3a.coef[,1])
ex3a.lower.ci <- exp(ex3a.coef[,1] - 1.96 * ex3a.coef[,2])
ex3a.upper.ci <- exp(ex3a.coef[,1] + 1.96 * ex3a.coef[,2])
ex3a.coef.ci <- cbind(ex3a.ests, ex3a.lower.ci, ex3a.upper.ci)
ex3a.coef.ci

#Regression 4
ex3b <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(profiling)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3b.coef <- summary(ex3b)$coef
ex3b.ests <- exp(ex3b.coef[,1])
ex3b.lower.ci <- exp(ex3b.coef[,1] - 1.96 * ex3b.coef[,2])
ex3b.upper.ci <- exp(ex3b.coef[,1] + 1.96 * ex3b.coef[,2])
ex3b.coef.ci <- cbind(ex3b.ests, ex3b.lower.ci, ex3b.upper.ci)
ex3b.coef.ci

# Our results
ours.ex3a <- as.matrix(exp(ex3a$coefficients[-1]))
ours.ex3b <- as.matrix(exp(ex3b$coefficients[-1]))

# Combined
exhibit3repl <- cbind(ours.ex3a, ours.ex3b)
round(exhibit3repl, digits = 2)

# Summarized output for original model analysis on 2007 data
summary(ex2a)    					#Exhibit 2 significance levels
round(ex2a.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex2b)    					#Exhibit 2 significance levels
round(ex2b.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex3a)    					#Exhibit 3 significance levels
round(ex3a.coef.ci, digits = 2)		#Exhibit 3 confidence intervals
summary(ex3b)    					#Exhibit 3 significance levels
round(ex3b.coef.ci, digits = 2)		#Exhibit 3 confidence intervals


###########################################################################
## G) Closest possible model on other years' data (2007 data) 
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs07.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytype==1] <- 0
data$insurancetype[data$paytype==2] <- 1
data$insurancetype[data$paytype==3] <- 2
data$insurancetype[data$paytype==4|data$paytype==7] <- 3
data$insurancetype[data$paytype==5|data$paytype==6] <- 4

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1] <- 1

# Regression 1
ex2a <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2a.coef <- summary(ex2a)$coef
ex2a.ests <- exp(ex2a.coef[,1])
ex2a.lower.ci <- exp(ex2a.coef[,1] - 1.96 * ex2a.coef[,2])
ex2a.upper.ci <- exp(ex2a.coef[,1] + 1.96 * ex2a.coef[,2])
ex2a.coef.ci <- cbind(ex2a.ests, ex2a.lower.ci, ex2a.upper.ci)
ex2a.coef.ci

# Regression 2
ex2b <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2b.coef <- summary(ex2b)$coef
ex2b.ests <- exp(ex2b.coef[,1])
ex2b.lower.ci <- exp(ex2b.coef[,1] - 1.96 * ex2b.coef[,2])
ex2b.upper.ci <- exp(ex2b.coef[,1] + 1.96 * ex2b.coef[,2])
ex2b.coef.ci <- cbind(ex2b.ests, ex2b.lower.ci, ex2b.upper.ci)
ex2b.coef.ci

# Our results
ours.ex2a <- as.matrix(exp(ex2a$coefficients[-1]))
ours.ex2b <- as.matrix(exp(ex2b$coefficients[-1]))

# Combined
exhibit2repl <- cbind(ours.ex2a, ours.ex2b)
round(exhibit2repl, digits = 2)

#Regression 3
ex3a <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3a.coef <- summary(ex3a)$coef
ex3a.ests <- exp(ex3a.coef[,1])
ex3a.lower.ci <- exp(ex3a.coef[,1] - 1.96 * ex3a.coef[,2])
ex3a.upper.ci <- exp(ex3a.coef[,1] + 1.96 * ex3a.coef[,2])
ex3a.coef.ci <- cbind(ex3a.ests, ex3a.lower.ci, ex3a.upper.ci)
ex3a.coef.ci

#Regression 4
ex3b <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3b.coef <- summary(ex3b)$coef
ex3b.ests <- exp(ex3b.coef[,1])
ex3b.lower.ci <- exp(ex3b.coef[,1] - 1.96 * ex3b.coef[,2])
ex3b.upper.ci <- exp(ex3b.coef[,1] + 1.96 * ex3b.coef[,2])
ex3b.coef.ci <- cbind(ex3b.ests, ex3b.lower.ci, ex3b.upper.ci)
ex3b.coef.ci

# Our results
ours.ex3a <- as.matrix(exp(ex3a$coefficients[-1]))
ours.ex3b <- as.matrix(exp(ex3b$coefficients[-1]))

# Combined
exhibit3repl <- cbind(ours.ex3a, ours.ex3b)
round(exhibit3repl, digits = 2)

# Summarized output for modified model analysis on 2007 data
summary(ex2a)    					#Exhibit 2 significance levels
round(ex2a.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex2b)    					#Exhibit 2 significance levels
round(ex2b.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex3a)    					#Exhibit 3 significance levels
round(ex3a.coef.ci, digits = 2)		#Exhibit 3 confidence intervals
summary(ex3b)    					#Exhibit 3 significance levels
round(ex3b.coef.ci, digits = 2)		#Exhibit 3 confidence intervals


###########################################################################
## H) Closest possible model on other years' data (2008 data) 
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs08.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0
data$insurancetype[data$paytyper==2] <- 1
data$insurancetype[data$paytyper==3] <- 2
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1] <- 1

# Regression 1
ex2a <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2a.coef <- summary(ex2a)$coef
ex2a.ests <- exp(ex2a.coef[,1])
ex2a.lower.ci <- exp(ex2a.coef[,1] - 1.96 * ex2a.coef[,2])
ex2a.upper.ci <- exp(ex2a.coef[,1] + 1.96 * ex2a.coef[,2])
ex2a.coef.ci <- cbind(ex2a.ests, ex2a.lower.ci, ex2a.upper.ci)
ex2a.coef.ci

# Regression 2
ex2b <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2b.coef <- summary(ex2b)$coef
ex2b.ests <- exp(ex2b.coef[,1])
ex2b.lower.ci <- exp(ex2b.coef[,1] - 1.96 * ex2b.coef[,2])
ex2b.upper.ci <- exp(ex2b.coef[,1] + 1.96 * ex2b.coef[,2])
ex2b.coef.ci <- cbind(ex2b.ests, ex2b.lower.ci, ex2b.upper.ci)
ex2b.coef.ci

# Our results
ours.ex2a <- as.matrix(exp(ex2a$coefficients[-1]))
ours.ex2b <- as.matrix(exp(ex2b$coefficients[-1]))

# Combined
exhibit2repl <- cbind(ours.ex2a, ours.ex2b)
round(exhibit2repl, digits = 2)

#Regression 3
ex3a <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3a.coef <- summary(ex3a)$coef
ex3a.ests <- exp(ex3a.coef[,1])
ex3a.lower.ci <- exp(ex3a.coef[,1] - 1.96 * ex3a.coef[,2])
ex3a.upper.ci <- exp(ex3a.coef[,1] + 1.96 * ex3a.coef[,2])
ex3a.coef.ci <- cbind(ex3a.ests, ex3a.lower.ci, ex3a.upper.ci)
ex3a.coef.ci

#Regression 4
ex3b <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3b.coef <- summary(ex3b)$coef
ex3b.ests <- exp(ex3b.coef[,1])
ex3b.lower.ci <- exp(ex3b.coef[,1] - 1.96 * ex3b.coef[,2])
ex3b.upper.ci <- exp(ex3b.coef[,1] + 1.96 * ex3b.coef[,2])
ex3b.coef.ci <- cbind(ex3b.ests, ex3b.lower.ci, ex3b.upper.ci)
ex3b.coef.ci

# Our results
ours.ex3a <- as.matrix(exp(ex3a$coefficients[-1]))
ours.ex3b <- as.matrix(exp(ex3b$coefficients[-1]))

# Combined
exhibit3repl <- cbind(ours.ex3a, ours.ex3b)
round(exhibit3repl, digits = 2)

# Summarized output for modified model analysis on 2008 data
summary(ex2a)    					#Exhibit 2 significance levels
round(ex2a.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex2b)    					#Exhibit 2 significance levels
round(ex2b.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex3a)    					#Exhibit 3 significance levels
round(ex3a.coef.ci, digits = 2)		#Exhibit 3 confidence intervals
summary(ex3b)    					#Exhibit 3 significance levels
round(ex3b.coef.ci, digits = 2)		#Exhibit 3 confidence intervals


###########################################################################
## I) Closest possible model on other years' data (2009 data) 
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs09.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$racer==2] <- 1
data$black[data$racer!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0
data$insurancetype[data$paytyper==2] <- 1
data$insurancetype[data$paytyper==3] <- 2
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

#data$profiling <- NA
#data$profiling[data$pccpprof==1] <- 1
#data$profiling[data$pccpprof==2] <- 0
#item was dropped from 2009 survey

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1] <- 1

# Regression 1
ex2a <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2a.coef <- summary(ex2a)$coef
ex2a.ests <- exp(ex2a.coef[,1])
ex2a.lower.ci <- exp(ex2a.coef[,1] - 1.96 * ex2a.coef[,2])
ex2a.upper.ci <- exp(ex2a.coef[,1] + 1.96 * ex2a.coef[,2])
ex2a.coef.ci <- cbind(ex2a.ests, ex2a.lower.ci, ex2a.upper.ci)
ex2a.coef.ci

# Regression 2
ex2b <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex2b.coef <- summary(ex2b)$coef
ex2b.ests <- exp(ex2b.coef[,1])
ex2b.lower.ci <- exp(ex2b.coef[,1] - 1.96 * ex2b.coef[,2])
ex2b.upper.ci <- exp(ex2b.coef[,1] + 1.96 * ex2b.coef[,2])
ex2b.coef.ci <- cbind(ex2b.ests, ex2b.lower.ci, ex2b.upper.ci)
ex2b.coef.ci

# Our results
ours.ex2a <- as.matrix(exp(ex2a$coefficients[-1]))
ours.ex2b <- as.matrix(exp(ex2b$coefficients[-1]))

# Combined
exhibit2repl <- cbind(ours.ex2a, ours.ex2b)
round(exhibit2repl, digits = 2)

#Regression 3
ex3a <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compresults), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3a.coef <- summary(ex3a)$coef
ex3a.ests <- exp(ex3a.coef[,1])
ex3a.lower.ci <- exp(ex3a.coef[,1] - 1.96 * ex3a.coef[,2])
ex3a.upper.ci <- exp(ex3a.coef[,1] + 1.96 * ex3a.coef[,2])
ex3a.coef.ci <- cbind(ex3a.ests, ex3a.lower.ci, ex3a.upper.ci)
ex3a.coef.ci

#Regression 4
ex3b <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+factor(solopractice)+factor(ownership)+factor(prepaid)+factor(hospitalowned)+factor(compimages), model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=data)
ex3b.coef <- summary(ex3b)$coef
ex3b.ests <- exp(ex3b.coef[,1])
ex3b.lower.ci <- exp(ex3b.coef[,1] - 1.96 * ex3b.coef[,2])
ex3b.upper.ci <- exp(ex3b.coef[,1] + 1.96 * ex3b.coef[,2])
ex3b.coef.ci <- cbind(ex3b.ests, ex3b.lower.ci, ex3b.upper.ci)
ex3b.coef.ci

# Our results
ours.ex3a <- as.matrix(exp(ex3a$coefficients[-1]))
ours.ex3b <- as.matrix(exp(ex3b$coefficients[-1]))

# Combined
exhibit3repl <- cbind(ours.ex3a, ours.ex3b)
round(exhibit3repl, digits = 2)

# Summarized output for modified model analysis on 2009 data
summary(ex2a)    					#Exhibit 2 significance levels
round(ex2a.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex2b)    					#Exhibit 2 significance levels
round(ex2b.coef.ci, digits = 2)		#Exhibit 2 confidence intervals
summary(ex3a)    					#Exhibit 3 significance levels
round(ex3a.coef.ci, digits = 2)		#Exhibit 3 confidence intervals
summary(ex3b)    					#Exhibit 3 significance levels
round(ex3b.coef.ci, digits = 2)		#Exhibit 3 confidence intervals


###########################################################################
## J) CEM with multiple imputation for missing data (2007 data, closest possible model)
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs07.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytype==1] <- 0 
data$insurancetype[data$paytype==2] <- 1 
data$insurancetype[data$paytype==3] <- 2 
data$insurancetype[data$paytype==4|data$paytype==7] <- 3 
data$insurancetype[data$paytype==5|data$paytype==6] <- 4 

data$specialty <- NA
data$specialty[data$speccat==2] <- 0
data$specialty[data$speccat==1] <- 1
data$specialty[data$speccat==3] <- 2

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0 
data$practice[data$retypoff==3] <- 1
data$practice[data$retypoff==7] <- 2
data$practice[data$retypoff==2] <- 3
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1] <- 1

# Mutiple imputation for missing data
data.subset <- data[c("anyimage", "advancedimage", "gender", "recodedage", "black", "hispanic", "highpoverty", "urban", "established", "insurancetype", "specialty", "practice", "solopractice", "ownership", "prepaid", "hospitalowned", "compresults", "compimages", "patwt", "cpsum", "cstratm")]
data.imputed <- amelia(data.subset, m = 5, ords = c("anyimage", "advancedimage","compresults","compimages"), noms = c("insurancetype", "specialty", "practice"), idvars = c("patwt","cpsum","cstratm"))
# Only 1786 cases have missing data
#Note: Only variables with missing data are 10) "insurancetype", 13) "solopractice", 14) "ownership", 16) "hospitalowned", 17) "compresults", 18) "compimages", and ??) "profiling".

# CEM for imputed data for compresults
first.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[1]], method="cem")
first.results.matched <- match.data(first.results)
balance.1r <- imbalance(group=first.results.matched$compresults, data=first.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1r
# Multivariate Imbalance Measure: L1=0.333
# Percentage of local common support: LCS=100.0%

second.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[2]], method="cem")
second.results.matched <- match.data(second.results)
balance.2r <- imbalance(group=second.results.matched$compresults, data=second.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2r
# Multivariate Imbalance Measure: L1=0.336
# Percentage of local common support: LCS=100.0%

third.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[3]], method="cem")
third.results.matched <- match.data(third.results)
balance.3r <- imbalance(group=third.results.matched$compresults, data=third.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3r
# Multivariate Imbalance Measure: L1=0.335
# Percentage of local common support: LCS=100.0%

fourth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[4]], method="cem")
fourth.results.matched <- match.data(fourth.results)
balance.4r <- imbalance(group=fourth.results.matched$compresults, data=fourth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4r
# Multivariate Imbalance Measure: L1=0.337
# Percentage of local common support: LCS=100.0%

fifth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[5]], method="cem")
fifth.results.matched <- match.data(fifth.results)
balance.5r <- imbalance(group=fifth.results.matched$compresults, data=fifth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5r
# Multivariate Imbalance Measure: L1=0.334
# Percentage of local common support: LCS=100.0%

# CEM for imputed data for compimages
first.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[1]], method="cem")
first.images.matched <- match.data(first.images)
balance.1i <- imbalance(group=first.images.matched$compimages, data=first.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1i
# Multivariate Imbalance Measure: L1=0.319
# Percentage of local common support: LCS=99.9%

second.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[2]], method="cem")
second.images.matched <- match.data(second.images)
balance.2i <- imbalance(group=second.images.matched$compimages, data=second.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2i
# Multivariate Imbalance Measure: L1=0.325
# Percentage of local common support: LCS=100.0%

third.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[3]], method="cem")
third.images.matched <- match.data(third.images)
balance.3i <- imbalance(group=third.images.matched$compimages, data=third.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3i
# Multivariate Imbalance Measure: L1=0.319
# Percentage of local common support: LCS=99.9%

fourth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[4]], method="cem")
fourth.images.matched <- match.data(fourth.images)
balance.4i <- imbalance(group=fourth.images.matched$compimages, data=fourth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4i
# Multivariate Imbalance Measure: L1=0.323
# Percentage of local common support: LCS=99.9%

fifth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[5]], method="cem")
fifth.images.matched <- match.data(fifth.images)
balance.5i <- imbalance(group=fifth.images.matched$compimages, data=fifth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5i
# Multivariate Imbalance Measure: L1=0.325
# Percentage of local common support: LCS=100.0%

# Regression 1
reg1.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg1.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg1.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg1.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg1.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg1.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.1), sigma=vcov(reg1.1)))
sim.beta.reg1.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.2), sigma=vcov(reg1.2)))
sim.beta.reg1.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.3), sigma=vcov(reg1.3)))
sim.beta.reg1.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.4), sigma=vcov(reg1.4)))
sim.beta.reg1.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.5), sigma=vcov(reg1.5)))
betas.reg1 <- c(sim.beta.reg1.1$compresults, sim.beta.reg1.2$compresults, sim.beta.reg1.3$compresults, sim.beta.reg1.4$compresults, sim.beta.reg1.5$compresults)
reg1.imputed.matched.OR <- exp(mean(betas.reg1))
reg1.imputed.matched.OR.lci <- exp(quantile(betas.reg1, 0.025))
reg1.imputed.matched.OR.uci <- exp(quantile(betas.reg1, 0.975))
reg1.imputed.matched <- cbind(reg1.imputed.matched.OR, reg1.imputed.matched.OR.lci, reg1.imputed.matched.OR.uci)

# Regression 2
reg2.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg2.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg2.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg2.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg2.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg2.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.1), sigma=vcov(reg2.1)))
sim.beta.reg2.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.2), sigma=vcov(reg2.2)))
sim.beta.reg2.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.3), sigma=vcov(reg2.3)))
sim.beta.reg2.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.4), sigma=vcov(reg2.4)))
sim.beta.reg2.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.5), sigma=vcov(reg2.5)))
betas.reg2 <- c(sim.beta.reg2.1$compimages, sim.beta.reg2.2$compimages, sim.beta.reg2.3$compimages, sim.beta.reg2.4$compimages, sim.beta.reg2.5$compimages)
reg2.imputed.matched.OR <- exp(mean(betas.reg2))
reg2.imputed.matched.OR.lci <- exp(quantile(betas.reg2, 0.025))
reg2.imputed.matched.OR.uci <- exp(quantile(betas.reg2, 0.975))
reg2.imputed.matched <- cbind(reg2.imputed.matched.OR, reg2.imputed.matched.OR.lci, reg2.imputed.matched.OR.uci)

# Regression 3
reg3.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg3.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg3.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg3.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg3.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg3.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.1), sigma=vcov(reg3.1)))
sim.beta.reg3.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.2), sigma=vcov(reg3.2)))
sim.beta.reg3.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.3), sigma=vcov(reg3.3)))
sim.beta.reg3.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.4), sigma=vcov(reg3.4)))
sim.beta.reg3.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.5), sigma=vcov(reg3.5)))
betas.reg3 <- c(sim.beta.reg3.1$compresults, sim.beta.reg3.2$compresults, sim.beta.reg3.3$compresults, sim.beta.reg3.4$compresults, sim.beta.reg3.5$compresults)
reg3.imputed.matched.OR <- exp(mean(betas.reg3))
reg3.imputed.matched.OR.lci <- exp(quantile(betas.reg3, 0.025))
reg3.imputed.matched.OR.uci <- exp(quantile(betas.reg3, 0.975))
reg3.imputed.matched <- cbind(reg3.imputed.matched.OR, reg3.imputed.matched.OR.lci, reg3.imputed.matched.OR.uci)

# Regression 4
reg4.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg4.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg4.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg4.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg4.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg4.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.1), sigma=vcov(reg4.1)))
sim.beta.reg4.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.2), sigma=vcov(reg4.2)))
sim.beta.reg4.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.3), sigma=vcov(reg4.3)))
sim.beta.reg4.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.4), sigma=vcov(reg4.4)))
sim.beta.reg4.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.5), sigma=vcov(reg4.5)))
betas.reg4 <- c(sim.beta.reg4.1$compimages, sim.beta.reg4.2$compimages, sim.beta.reg4.3$compimages, sim.beta.reg4.4$compimages, sim.beta.reg4.5$compimages)
reg4.imputed.matched.OR <- exp(mean(betas.reg4))
reg4.imputed.matched.OR.lci <- exp(quantile(betas.reg4, 0.025))
reg4.imputed.matched.OR.uci <- exp(quantile(betas.reg4, 0.975))
reg4.imputed.matched <- cbind(reg4.imputed.matched.OR, reg4.imputed.matched.OR.lci, reg4.imputed.matched.OR.uci)

imputed.CEM.2007 <- rbind(reg1.imputed.matched, reg2.imputed.matched, reg3.imputed.matched, reg4.imputed.matched)
rownames(imputed.CEM.2007) <- rbind("Regression 1", "Regression 2", "Regression 3", "Regression 4")
colnames(imputed.CEM.2007) <- cbind("OR", "lower CI", "upper CI")

# Summarized output for CEM with multiple imputation for missing data
round(imputed.CEM.2007, digits = 2)		# Coefficients and CIs


###########################################################################
## K) CEM with multiple imputation for missing data (2008 data, closest possible model)
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs08.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$raceim==2] <- 1
data$black[data$raceim!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1
data$urban[data$msa==2] <- 0

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0 
data$insurancetype[data$paytyper==2] <- 1 
data$insurancetype[data$paytyper==3] <- 2 
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3 
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4 

data$specialty <- NA
data$specialty[data$speccat==2] <- 0 
data$specialty[data$speccat==1] <- 1 
data$specialty[data$speccat==3] <- 2 

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0 
data$practice[data$retypoff==3] <- 1 
data$practice[data$retypoff==7] <- 2 
data$practice[data$retypoff==2] <- 3 
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4 

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

data$profiling <- NA
data$profiling[data$pccpprof==1] <- 1
data$profiling[data$pccpprof==2] <- 0

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1] <- 1

# Mutiple imputation for missing data
data.subset <- data[c("anyimage", "advancedimage", "gender", "recodedage", "black", "hispanic", "highpoverty", "urban", "established", "insurancetype", "specialty", "practice", "solopractice", "ownership", "prepaid", "hospitalowned", "compresults", "compimages", "patwt", "cpsum", "cstratm")]
data.imputed <- amelia(data.subset, m = 5, ords = c("anyimage", "advancedimage","compresults","compimages"), noms = c("insurancetype", "specialty", "practice"), idvars = c("patwt","cpsum","cstratm"))
# Only 1786 cases have missing data
#Note: Only variables with missing data are 10) "insurancetype", 13) "solopractice", 14) "ownership", 16) "hospitalowned", 17) "compresults", 18) "compimages", and ??) "profiling".

# CEM for imputed data for compresults
first.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[1]], method="cem")
first.results.matched <- match.data(first.results)
balance.1r <- imbalance(group=first.results.matched$compresults, data=first.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1r
# Multivariate Imbalance Measure: L1=0.303
# Percentage of local common support: LCS=99.9%

second.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[2]], method="cem")
second.results.matched <- match.data(second.results)
balance.2r <- imbalance(group=second.results.matched$compresults, data=second.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2r
# Multivariate Imbalance Measure: L1=0.302
# Percentage of local common support: LCS=99.8%

third.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[3]], method="cem")
third.results.matched <- match.data(third.results)
balance.3r <- imbalance(group=third.results.matched$compresults, data=third.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3r
# Multivariate Imbalance Measure: L1=0.303
# Percentage of local common support: LCS=99.7%

fourth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[4]], method="cem")
fourth.results.matched <- match.data(fourth.results)
balance.4r <- imbalance(group=fourth.results.matched$compresults, data=fourth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4r
# Multivariate Imbalance Measure: L1=0.303
# Percentage of local common support: LCS=99.9%

fifth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[5]], method="cem")
fifth.results.matched <- match.data(fifth.results)
balance.5r <- imbalance(group=fifth.results.matched$compresults, data=fifth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5r
# Multivariate Imbalance Measure: L1=0.303
# Percentage of local common support: LCS=99.6%

# CEM for imputed data for compimages
first.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[1]], method="cem")
first.images.matched <- match.data(first.images)
balance.1i <- imbalance(group=first.images.matched$compimages, data=first.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1i
# Multivariate Imbalance Measure: L1=0.288
# Percentage of local common support: LCS=99.9%

second.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[2]], method="cem")
second.images.matched <- match.data(second.images)
balance.2i <- imbalance(group=second.images.matched$compimages, data=second.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2i
# Multivariate Imbalance Measure: L1=0.294
# Percentage of local common support: LCS=99.9%

third.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[3]], method="cem")
third.images.matched <- match.data(third.images)
balance.3i <- imbalance(group=third.images.matched$compimages, data=third.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3i
# Multivariate Imbalance Measure: L1=0.292
# Percentage of local common support: LCS=99.7%

fourth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[4]], method="cem")
fourth.images.matched <- match.data(fourth.images)
balance.4i <- imbalance(group=fourth.images.matched$compimages, data=fourth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4i
# Multivariate Imbalance Measure: L1=0.294
# Percentage of local common support: LCS=99.9%

fifth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[5]], method="cem")
fifth.images.matched <- match.data(fifth.images)
balance.5i <- imbalance(group=fifth.images.matched$compimages, data=fifth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5i
# Multivariate Imbalance Measure: L1=0.293
# Percentage of local common support: LCS=99.6%

# Regression 1
reg1.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg1.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg1.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg1.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg1.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg1.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.1), sigma=vcov(reg1.1)))
sim.beta.reg1.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.2), sigma=vcov(reg1.2)))
sim.beta.reg1.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.3), sigma=vcov(reg1.3)))
sim.beta.reg1.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.4), sigma=vcov(reg1.4)))
sim.beta.reg1.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.5), sigma=vcov(reg1.5)))
betas.reg1 <- c(sim.beta.reg1.1$compresults, sim.beta.reg1.2$compresults, sim.beta.reg1.3$compresults, sim.beta.reg1.4$compresults, sim.beta.reg1.5$compresults)
reg1.imputed.matched.OR <- exp(mean(betas.reg1))
reg1.imputed.matched.OR.lci <- exp(quantile(betas.reg1, 0.025))
reg1.imputed.matched.OR.uci <- exp(quantile(betas.reg1, 0.975))
reg1.imputed.matched <- cbind(reg1.imputed.matched.OR, reg1.imputed.matched.OR.lci, reg1.imputed.matched.OR.uci)

# Regression 2
reg2.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg2.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg2.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg2.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg2.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg2.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.1), sigma=vcov(reg2.1)))
sim.beta.reg2.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.2), sigma=vcov(reg2.2)))
sim.beta.reg2.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.3), sigma=vcov(reg2.3)))
sim.beta.reg2.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.4), sigma=vcov(reg2.4)))
sim.beta.reg2.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.5), sigma=vcov(reg2.5)))
betas.reg2 <- c(sim.beta.reg2.1$compimages, sim.beta.reg2.2$compimages, sim.beta.reg2.3$compimages, sim.beta.reg2.4$compimages, sim.beta.reg2.5$compimages)
reg2.imputed.matched.OR <- exp(mean(betas.reg2))
reg2.imputed.matched.OR.lci <- exp(quantile(betas.reg2, 0.025))
reg2.imputed.matched.OR.uci <- exp(quantile(betas.reg2, 0.975))
reg2.imputed.matched <- cbind(reg2.imputed.matched.OR, reg2.imputed.matched.OR.lci, reg2.imputed.matched.OR.uci)

# Regression 3
reg3.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg3.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg3.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg3.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg3.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg3.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.1), sigma=vcov(reg3.1)))
sim.beta.reg3.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.2), sigma=vcov(reg3.2)))
sim.beta.reg3.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.3), sigma=vcov(reg3.3)))
sim.beta.reg3.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.4), sigma=vcov(reg3.4)))
sim.beta.reg3.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.5), sigma=vcov(reg3.5)))
betas.reg3 <- c(sim.beta.reg3.1$compresults, sim.beta.reg3.2$compresults, sim.beta.reg3.3$compresults, sim.beta.reg3.4$compresults, sim.beta.reg3.5$compresults)
reg3.imputed.matched.OR <- exp(mean(betas.reg3))
reg3.imputed.matched.OR.lci <- exp(quantile(betas.reg3, 0.025))
reg3.imputed.matched.OR.uci <- exp(quantile(betas.reg3, 0.975))
reg3.imputed.matched <- cbind(reg3.imputed.matched.OR, reg3.imputed.matched.OR.lci, reg3.imputed.matched.OR.uci)

# Regression 4
reg4.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg4.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg4.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg4.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg4.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg4.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.1), sigma=vcov(reg4.1)))
sim.beta.reg4.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.2), sigma=vcov(reg4.2)))
sim.beta.reg4.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.3), sigma=vcov(reg4.3)))
sim.beta.reg4.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.4), sigma=vcov(reg4.4)))
sim.beta.reg4.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.5), sigma=vcov(reg4.5)))
betas.reg4 <- c(sim.beta.reg4.1$compimages, sim.beta.reg4.2$compimages, sim.beta.reg4.3$compimages, sim.beta.reg4.4$compimages, sim.beta.reg4.5$compimages)
reg4.imputed.matched.OR <- exp(mean(betas.reg4))
reg4.imputed.matched.OR.lci <- exp(quantile(betas.reg4, 0.025))
reg4.imputed.matched.OR.uci <- exp(quantile(betas.reg4, 0.975))
reg4.imputed.matched <- cbind(reg4.imputed.matched.OR, reg4.imputed.matched.OR.lci, reg4.imputed.matched.OR.uci)

imputed.CEM.2008 <- rbind(reg1.imputed.matched, reg2.imputed.matched, reg3.imputed.matched, reg4.imputed.matched)
rownames(imputed.CEM.2008) <- rbind("Regression 1", "Regression 2", "Regression 3", "Regression 4")
colnames(imputed.CEM.2008) <- cbind("OR", "lower CI", "upper CI")

# Summarized output for CEM with multiple imputation for missing data
round(imputed.CEM.2008, digits = 2)		# Coefficients and CIs


###########################################################################
## L) CEM with multiple imputation for missing data (2009 data, closest possible model)
###########################################################################

ls()
rm(list = ls())
data <- read.dta("namcs09.dta")
dstrat <- svydesign(id=~cpsum,strata=~cstratm, weights=~patwt, data=data)

data$gender <- NA
data$gender[data$sex==2]<-0
data$gender[data$sex==1]<-1

data$recodedage <- NA
data$recodedage[data$age < 18] <- 0
data$recodedage[data$age >= 18 & data$age <= 45] <- 1
data$recodedage[data$age > 45 & data$age <= 64] <- 2
data$recodedage[data$age > 64] <- 3

data$black <- NA
data$black[data$racer==2] <- 1
data$black[data$racer!=2] <- 0

data$hispanic <- NA
data$hispanic[data$ethim==1]<-1
data$hispanic[data$ethim==2]<-0

data$highpoverty <- NA
data$highpoverty[data$pctpovr==3 | data$pctpovr==4] <- 1
data$highpoverty[data$pctpovr==1 | data$pctpovr==2 | data$pctpovr==-9] <- 0

data$urban <- NA
data$urban[data$msa==1] <- 1 
data$urban[data$msa==2] <- 0 

data$established <- NA
data$established[data$senbefor==1] <- 1
data$established[data$senbefor==2] <- 0

data$insurancetype <- NA
data$insurancetype[data$paytyper==1] <- 0 
data$insurancetype[data$paytyper==2] <- 1 
data$insurancetype[data$paytyper==3] <- 2 
data$insurancetype[data$paytyper==4|data$paytyper==7] <- 3 
data$insurancetype[data$paytyper==5|data$paytyper==6] <- 4 

data$specialty <- NA
data$specialty[data$speccat==2] <- 0 
data$specialty[data$speccat==1] <- 1 
data$specialty[data$speccat==3] <- 2 

data$practice <- NA
data$practice[data$retypoff==1|data$retypoff==8] <- 0 
data$practice[data$retypoff==3] <- 1 
data$practice[data$retypoff==7] <- 2 
data$practice[data$retypoff==2] <- 3 
data$practice[data$retypoff==4|data$retypoff==5|data$retypoff==6] <-4 

data$solopractice <- NA
data$solopractice[data$solo==1] <- 1
data$solopractice[data$solo==2] <- 0

data$ownership <- NA
data$ownership[data$empstat==1] <- 1
data$ownership[data$empstat==2|data$empstat==3] <- 0

data$prepaid <- 0
data$prepaid[data$retypoff==7|data$owns==2] <- 1
data$prepaid[data$revcapr==3|data$revcapr==4|data$revcaser==3|data$revcaser==4|(data$revcapr==2&data$revcaser==2)] <- 1 

data$hospitalowned <- 0
data$hospitalowned[data$owns==4|data$owns==5] <- 1
data$hospitalowned[data$owns==-9] <- NA 

#data$profiling <- NA
#data$profiling[data$pccpprof==1] <- 1
#data$profiling[data$pccpprof==2] <- 0
#item was dropped from 2009 survey

data$compresults <- NA
data$compresults[data$eimgres==1] <- 1
data$compresults[data$eimgres==2|data$eimgres==4] <- 0

data$compimages <- NA
data$compimages[data$eimage==1] <- 1
data$compimages[data$eimage==2|data$eimage==4|data$eimage==-7] <- 0

data$advancedimage <- 0
data$advancedimage[data$catscan==1|data$mri==1] <- 1

# Mutiple imputation for missing data
data.subset <- data[c("anyimage", "advancedimage", "gender", "recodedage", "black", "hispanic", "highpoverty", "urban", "established", "insurancetype", "specialty", "practice", "solopractice", "ownership", "prepaid", "hospitalowned", "compresults", "compimages", "patwt", "cpsum", "cstratm")]
data.imputed <- amelia(data.subset, m = 5, ords = c("anyimage", "advancedimage","compresults","compimages"), noms = c("insurancetype", "specialty", "practice"), idvars = c("patwt","cpsum","cstratm"))
# Only 1786 cases have missing data
#Note: Only variables with missing data are 10) "insurancetype", 13) "solopractice", 14) "ownership", 16) "hospitalowned", 17) "compresults", 18) "compimages", and ??) "profiling".

# CEM for imputed data for compresults
first.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[1]], method="cem")
first.results.matched <- match.data(first.results)
balance.1r <- imbalance(group=first.results.matched$compresults, data=first.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1r
# Multivariate Imbalance Measure: L1=0.354
# Percentage of local common support: LCS=98.7%

second.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[2]], method="cem")
second.results.matched <- match.data(second.results)
balance.2r <- imbalance(group=second.results.matched$compresults, data=second.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2r
# Multivariate Imbalance Measure: L1=0.356
# Percentage of local common support: LCS=99.2%

third.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[3]], method="cem")
third.results.matched <- match.data(third.results)
balance.3r <- imbalance(group=third.results.matched$compresults, data=third.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3r
# Multivariate Imbalance Measure: L1=0.355
# Percentage of local common support: LCS=99.2%

fourth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[4]], method="cem")
fourth.results.matched <- match.data(fourth.results)
balance.4r <- imbalance(group=fourth.results.matched$compresults, data=fourth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4r
# Multivariate Imbalance Measure: L1=0.354
# Percentage of local common support: LCS=98.5%

fifth.results <- matchit(formula=compresults~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[5]], method="cem")
fifth.results.matched <- match.data(fifth.results)
balance.5r <- imbalance(group=fifth.results.matched$compresults, data=fifth.results.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5r
# Multivariate Imbalance Measure: L1=0.356
# Percentage of local common support: LCS=99.3%

# CEM for imputed data for compimages
first.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[1]], method="cem")
first.images.matched <- match.data(first.images)
balance.1i <- imbalance(group=first.images.matched$compimages, data=first.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.1i
# Multivariate Imbalance Measure: L1=0.305
# Percentage of local common support: LCS=98.6%

second.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[2]], method="cem")
second.images.matched <- match.data(second.images)
balance.2i <- imbalance(group=second.images.matched$compimages, data=second.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.2i
# Multivariate Imbalance Measure: L1=0.305
# Percentage of local common support: LCS=99.3%

third.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[3]], method="cem")
third.images.matched <- match.data(third.images)
balance.3i <- imbalance(group=third.images.matched$compimages, data=third.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.3i
# Multivariate Imbalance Measure: L1=0.305
# Percentage of local common support: LCS=99.2%

fourth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[4]], method="cem")
fourth.images.matched <- match.data(fourth.images)
balance.4i <- imbalance(group=fourth.images.matched$compimages, data=fourth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.4i
# Multivariate Imbalance Measure: L1=0.307
# Percentage of local common support: LCS=98.4%

fifth.images <- matchit(formula=compimages~factor(gender)+factor(recodedage)+factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned, data=data.imputed$imputations[[5]], method="cem")
fifth.images.matched <- match.data(fifth.images)
balance.5i <- imbalance(group=fifth.images.matched$compimages, data=fifth.images.matched, drop=c("compresults","compimages","anyimage","advancedimage","cstratm","patwt","cpsum","distance","weights","subclass"))
balance.5i
# Multivariate Imbalance Measure: L1=0.310
# Percentage of local common support: LCS=99.1%

# Regression 1
reg1.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg1.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg1.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg1.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg1.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg1.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.1), sigma=vcov(reg1.1)))
sim.beta.reg1.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.2), sigma=vcov(reg1.2)))
sim.beta.reg1.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.3), sigma=vcov(reg1.3)))
sim.beta.reg1.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.4), sigma=vcov(reg1.4)))
sim.beta.reg1.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg1.5), sigma=vcov(reg1.5)))
betas.reg1 <- c(sim.beta.reg1.1$compresults, sim.beta.reg1.2$compresults, sim.beta.reg1.3$compresults, sim.beta.reg1.4$compresults, sim.beta.reg1.5$compresults)
reg1.imputed.matched.OR <- exp(mean(betas.reg1))
reg1.imputed.matched.OR.lci <- exp(quantile(betas.reg1, 0.025))
reg1.imputed.matched.OR.uci <- exp(quantile(betas.reg1, 0.975))
reg1.imputed.matched <- cbind(reg1.imputed.matched.OR, reg1.imputed.matched.OR.lci, reg1.imputed.matched.OR.uci)

# Regression 2
reg2.1 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg2.2 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg2.3 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg2.4 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg2.5 <- zelig(anyimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg2.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.1), sigma=vcov(reg2.1)))
sim.beta.reg2.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.2), sigma=vcov(reg2.2)))
sim.beta.reg2.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.3), sigma=vcov(reg2.3)))
sim.beta.reg2.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.4), sigma=vcov(reg2.4)))
sim.beta.reg2.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg2.5), sigma=vcov(reg2.5)))
betas.reg2 <- c(sim.beta.reg2.1$compimages, sim.beta.reg2.2$compimages, sim.beta.reg2.3$compimages, sim.beta.reg2.4$compimages, sim.beta.reg2.5$compimages)
reg2.imputed.matched.OR <- exp(mean(betas.reg2))
reg2.imputed.matched.OR.lci <- exp(quantile(betas.reg2, 0.025))
reg2.imputed.matched.OR.uci <- exp(quantile(betas.reg2, 0.975))
reg2.imputed.matched <- cbind(reg2.imputed.matched.OR, reg2.imputed.matched.OR.lci, reg2.imputed.matched.OR.uci)

# Regression 3
reg3.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.results.matched)
reg3.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.results.matched)
reg3.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.results.matched)
reg3.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.results.matched)
reg3.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compresults, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.results.matched)
set.seed(02138)
sim.beta.reg3.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.1), sigma=vcov(reg3.1)))
sim.beta.reg3.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.2), sigma=vcov(reg3.2)))
sim.beta.reg3.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.3), sigma=vcov(reg3.3)))
sim.beta.reg3.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.4), sigma=vcov(reg3.4)))
sim.beta.reg3.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg3.5), sigma=vcov(reg3.5)))
betas.reg3 <- c(sim.beta.reg3.1$compresults, sim.beta.reg3.2$compresults, sim.beta.reg3.3$compresults, sim.beta.reg3.4$compresults, sim.beta.reg3.5$compresults)
reg3.imputed.matched.OR <- exp(mean(betas.reg3))
reg3.imputed.matched.OR.lci <- exp(quantile(betas.reg3, 0.025))
reg3.imputed.matched.OR.uci <- exp(quantile(betas.reg3, 0.975))
reg3.imputed.matched <- cbind(reg3.imputed.matched.OR, reg3.imputed.matched.OR.lci, reg3.imputed.matched.OR.uci)

# Regression 4
reg4.1 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=first.images.matched)
reg4.2 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=second.images.matched)
reg4.3 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=third.images.matched)
reg4.4 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fourth.images.matched)
reg4.5 <- zelig(advancedimage ~ factor(gender)+factor(recodedage)+ factor(black)+factor(hispanic)+factor(highpoverty)+factor(urban)+factor(established)+factor(insurancetype)+factor(specialty)+factor(practice)+solopractice+ownership+factor(prepaid)+hospitalowned+compimages, model="logit.survey", weights = ~patwt, ids=~cpsum, strata=~cstratm, data=fifth.images.matched)
set.seed(02138)
sim.beta.reg4.1 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.1), sigma=vcov(reg4.1)))
sim.beta.reg4.2 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.2), sigma=vcov(reg4.2)))
sim.beta.reg4.3 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.3), sigma=vcov(reg4.3)))
sim.beta.reg4.4 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.4), sigma=vcov(reg4.4)))
sim.beta.reg4.5 <- as.data.frame(rmvnorm(1000, mean=coef(reg4.5), sigma=vcov(reg4.5)))
betas.reg4 <- c(sim.beta.reg4.1$compimages, sim.beta.reg4.2$compimages, sim.beta.reg4.3$compimages, sim.beta.reg4.4$compimages, sim.beta.reg4.5$compimages)
reg4.imputed.matched.OR <- exp(mean(betas.reg4))
reg4.imputed.matched.OR.lci <- exp(quantile(betas.reg4, 0.025))
reg4.imputed.matched.OR.uci <- exp(quantile(betas.reg4, 0.975))
reg4.imputed.matched <- cbind(reg4.imputed.matched.OR, reg4.imputed.matched.OR.lci, reg4.imputed.matched.OR.uci)

imputed.CEM.2009 <- rbind(reg1.imputed.matched, reg2.imputed.matched, reg3.imputed.matched, reg4.imputed.matched)
rownames(imputed.CEM.2009) <- rbind("Regression 1", "Regression 2", "Regression 3", "Regression 4")
colnames(imputed.CEM.2009) <- cbind("OR", "lower CI", "upper CI")

# Summarized output for CEM with multiple imputation for missing data
round(imputed.CEM.2009, digits = 2)		# Coefficients and CIs